import sys
import os
def Contrast(path1, path2):
    # with open(path1, encoding='utf-8') as file_object:
    #     lines_1 = file_object.readlines()
    #     # print(lines_1)
    #     lines_list1 = []
    #     for line_1 in lines_1:
    #         line_1 = line_1.rstrip()
    #         lines_list1.append(line_1)
    #     # print(lines_list)
    #     print("=======================原字段词库:"+str(len(lines_list1))+"==========================")
    #     lines_set1 = set(lines_list1)
    #     print("=======================原字段词库去重后:" + str(len(lines_set1)) + "==========================")

    with open(path2, encoding='utf-8') as file_object:
        lines_2 = file_object.readlines()
        # print(lines_2)
        lines_list2 = []
        for line_2 in lines_2:
            line_2 = line_2.rstrip()
            lines_list2.append(line_2)

        # print(len(lines_list2))
        lines_set2 = set(lines_list2)

    # result = lines_set1.difference(lines_set2)

    # print("=======================对比结果==========================")
    # print(result)
    # print("=======================不重复的新关键词:" + str(len(result)) + "==========================")
    # print("=======================不重复的新关键词:" + str(len(lines_set2)) + "==========================")

    with open('Contrast.txt', 'w', encoding='utf-8') as f:
        for i in lines_set2:
            f.write(i + '\n')


def sort_word():
    with open('Contrast.txt', 'r', encoding='utf-8') as f:
        lines = f.readlines()
        lines.sort(key=lambda x: -len(x))
        sort_list = []
        for line in lines:
            line = line.rstrip()
            # print(line)
            sort_list.append(line)
        # sort_set = set(sort_list)

    with open('test.txt', 'w', encoding='utf-8') as f:
        for i in sort_list:
            f.write(i + '\n')



if __name__ == '__main__':
    path1 = r"F:\kechuang_project\word\demoword.txt"
    path2 = r"F:\kechuang_project\word\key_word.txt"
    Contrast(path1, path2)

    sort_word()